import itertools

import numpy as np

from src.clustering.base_distances_bis import (
    norm_hamming,
    jaccard,
    rms_jaccard,
    geom_jaccard,
    geom_hamming,
    rms_hamming
)
from src.converters.convert_votes_to_vecs import (
    convert_profiles_to_vec_profiles
)
from src.imports.import_pabulib import import_pabulib_files_from_folder

base_distances = {
    'geom_jaccard': geom_jaccard,
    'jaccard': jaccard,
    'rms_jaccard': rms_jaccard,
    'geom_hamming': geom_hamming,
    'norm_hamming': norm_hamming,
    'rms_hamming': rms_hamming,
}

if __name__ == "__main__":

    targets = ['warszawa_2019_districts',
                'warszawa_2020_districts',
                'warszawa_2021_districts',
                'warszawa_2022_districts',
                'warszawa_2023_districts',
                'warszawa_2024_districts',
               'wieliczka']

    for target in targets:

        instances, profiles = import_pabulib_files_from_folder(f'data/pabulib/{target}')

        vec_profiles, names = convert_profiles_to_vec_profiles(profiles)

        our_distances_avg = {bd: [] for bd in base_distances}

        for instance_id in instances:

            P = vec_profiles[instance_id]
            P = np.array(P)
            P = np.transpose(P)

            num_projects = len(vec_profiles[instance_id])

            # get x,y coordinates of all the projects
            coordinates = {}
            for i in range(num_projects):
                i_id = names[instance_id][i]
                x_pos = instances[instance_id].project_meta[i_id]['longitude']
                y_pos = instances[instance_id].project_meta[i_id]['latitude']

                if x_pos != '' and y_pos != '':
                    x_pos = float(x_pos)
                    y_pos = float(y_pos)
                    coordinates[i_id] = (x_pos, y_pos)

            euclidian_distances = []
            our_distances = {bd: [] for bd in base_distances}
            for i, j in itertools.combinations(range(num_projects), 2):
                i_id = names[instance_id][i]
                j_id = names[instance_id][j]
                if i_id not in coordinates or j_id not in coordinates:
                    continue
                x1, y1 = coordinates[i_id]
                x2, y2 = coordinates[j_id]
                euclidian_distances.append(np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2))
                for bd in base_distances:
                    our_distances[bd].append(base_distances[bd](P[:, i], P[:, j]))

            # print Pearson correlation coefficient
            for bd in base_distances:
                our_distances_avg[bd].append(np.corrcoef(euclidian_distances, our_distances[bd])[0][1])

        print("\n")

        for bd in base_distances:
            A = our_distances_avg[bd]
            A = np.array(A)
            print(bd, round(np.mean(A),3))

        # print(A)#
        print("\n")
        # print mean of A and mean of B, and % in which A > B
        A = np.array(our_distances_avg['jaccard'])
        B = np.array(our_distances_avg['geom_jaccard'])
        print("Mean A < B: ", np.sum(A < B) / len(A))
        print("Mean A <= B: ", np.sum(A <= B) / len(A))
